#PBS -N 02AB_trimgalore_fastqc
#PBS -A GT-wratcliff3
#PBS -q inferno
#PBS -l nodes=1:ppn=6
#PBS -l pmem=4gb
#PBS -l walltime=5:00:00
#PBS -j oe
#PBS -o logs/02AB_trimgalore_fastqc.out
    
# change to project directory
PROJECT_DIR=$PBS_O_WORKDIR
echo "Changing directory to ${PROJECT_DIR}…"
cd $PROJECT_DIR
     
# activate environment
PIPELINE_ENV=`basename scripts/*.yml .yml`
echo "Activating conda environment ${PIPELINE_ENV}…"
source activate $PIPELINE_ENV
    
# extract sample name
samplenames=(`cat raw_data/samplenames.txt`)
sampleindex=$PBS_ARRAYID
samplename=${samplenames[$(($sampleindex-1))]}
     
echo -e "\n########## Start processing sample ${sampleindex}: ${samplename} ##########\n"

echo -e "\n########## Starting Trim Galore ##########\n"

# input/output
in=raw_data
in_R1=$in/${samplename}_R1.fastq.gz
in_R2=$in/${samplename}_R2.fastq.gz
out=results/02A_trimgalore
mkdir -p $out

# parameters
min_length=40

# main
trim_galore \
--paired --retain_unpaired \
--cores $PBS_NP \
--phred33 \
--2colour 20 \
--illumina \
--stringency 3 \
--length $min_length \
--output_dir $out \
--basename $samplename \
$in_R1 $in_R2

# rename files
for file in $out/${samplename}_val_[12].fq.gz; do mv $file `echo $file | sed 's/fq/fastq/' | sed 's/val_/paired_trimmed_R/'`; done
for file in $out/${samplename}_{R1_unpaired_1,R2_unpaired_2}.fq.gz; do mv $file `echo $file | sed 's/fq/fastq/' | sed 's/R[12]_unpaired_/unpaired_trimmed_R/'`; done

echo -e "\n########## Starting FastQC of trimmed reads ##########\n"

# input/output
in=results/02A_trimgalore
in_R1=$in/${samplename}_paired_trimmed_R1.fastq.gz
in_R2=$in/${samplename}_paired_trimmed_R2.fastq.gz
out=results/02B_fastqc_trimmed
mkdir -p $out

# main
fastqc \
$in_R1 $in_R2 \
-o $out \
-t $PBS_NP

